Brian M. Schilder, Bioinformatician II
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
load(file.path("Results","subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4","scRNAseq_results.RData"))library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(monocle) # BiocManager::install("monocle")
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
library(enrichR) #BiocManager::install("enrichR")# Convert Seurat objt to CDS object
mDAT <- monocle::importCDS(DAT, import_all = T)
# generate size factors for normalization later
mDAT <- DESeq2::estimateSizeFactors(mDAT)
# Get pre-trained PBMC classifer
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC
mDAT <- garnett::classify_cells(mDAT, hsPBMC,
db = org.Hs.eg.db,
cluster_extend = TRUE,
cds_gene_id_type = "SYMBOL")## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)##
## B cells CD34+ CD4 T cells CD8 T cells
## 13 92 3 1
## Dendritic cells Monocytes NK cells T cells
## 280 15025 801 16
## Unknown
## 2913
table(pData(mDAT)$cluster_ext_type) ##
## B cells CD34+ CD4 T cells Dendritic cells
## 12 17 3 165
## Monocytes NK cells T cells Unknown
## 17165 701 15 1066
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
node = "root",
db = org.Hs.eg.db,
convert_ids = F)
head(feature_genes) ## B cells CD34+ Dendritic cells Monocytes
## (Intercept) -0.884989922 0.47694427 -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859 -0.007978714 0.0056365451
## ENSG00000019582 0.030146634 -0.01959215 0.015232191 0.0001883507
## ENSG00000156738 2.193353690 -0.66191294 -1.104899464 -0.7393680112
## ENSG00000177455 2.056641938 -0.71851810 -1.941859710 -1.0448682467
## ENSG00000105369 1.800296145 -0.53835614 0.063380804 -0.1973728377
## NK cells T cells Unknown
## (Intercept) -0.911657164 0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455 0.297279508 0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# Convert back to Seurat object & save results
DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
write.csv(DAT@meta.data, file.path(resultsPath, "garnett_results.csv"), quote = F,row.names = F)markerList <- c("CD14", "FCGR3A")
markerData <- DAT@scale.data[row.names(DAT@scale.data) %in% markerList,] %>% t() %>% data.frame()
## Efficiently merge using data.table
dt1 <- data.table(markerData, keep.rownames = "Cell", key = "Cell") %>% copy()
dt2 <- data.table(DAT@meta.data[c("cell_type","post_clustering")], keep.rownames = "Cell", key = "Cell") %>% copy()
markerDT <- dt1[dt2]
ggplot(data = markerDT, aes(x=CD14, y=FCGR3A) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="purple", bins = 100, size=.1) +
geom_point(aes(color=as.factor(cell_type)), shape=16, stroke=0, size=2, alpha=.1) +
guides(colour = guide_legend(title="cell_type",override.aes = list(alpha = 1))) tSNE_metadata_plot <- function(var, labSize=12){
# Metadata plot
p1 <- Seurat::TSNEPlot(DAT, do.return = T, do.label = T, group.by = var,label.size = labSize,
plot.title=paste(var), vector.friendly=T) +
theme(legend.position = "top") +
scale_color_brewer( palette = "Dark2", aesthetics = aes(alpha=.5))
# t-SNE clusters plot
p2 <- Seurat::TSNEPlot(DAT, do.return = T, do.label = T,label.size = labSize,
plot.title=paste("Unsupervised Clusters"), vector.friendly=T) +
theme(legend.position = "top") +
scale_color_brewer( palette = "Set1", aesthetics = aes(alpha=.5))
print(cowplot::plot_grid(p1,p2))
} tSNE_metadata_plot("cell_type")tSNE_metadata_plot("cluster_ext_type")tSNE_metadata_plot("mut")FeaturePlot(DAT,features.plot = c("CD14","FCGR3A", "LRRK2", "GBA"),
cols.use = c("grey","purple","magenta"), dark.theme = T, nCol = 2 )FeatureHeatmap(DAT, c("CD14","FCGR3A","LRRK2", "GBA"), pt.size = 0.5,
cols.use = c("grey","purple"), plot.horiz =T) # library("BiocParallel")
# register(MulticoreParam(4))
# MonocyteSubtype.markers <- FindMarkers(DAT, min.pct = 0.25,
# ident.1 = 2, ident.2 = c(0:1), test.use = "DESeq2",
# print.bar = T, only.pos = F, parallel=TRUE
# )
MonocyteSubtype.markers <- FindMarkers(DAT, min.pct = 0.25, only.pos = F,
ident.1 = 1, ident.2 = 0, test.use = "wilcox"
)
createDT(MonocyteSubtype.markers, caption="DEGs between cluster 0 (CD16- monocytes) and cluster 1 (CD16+ monocytes")print(x = head(x = MonocyteSubtype.markers, n = 5))## p_val avg_logFC pct.1 pct.2 p_val_adj
## FCGR3A 0 2.008798 0.762 0.064 0
## RHOC 0 1.418097 0.540 0.064 0
## CDKN1C 0 1.318540 0.406 0.024 0
## IFITM3 0 1.241075 0.827 0.358 0
## MS4A7 0 1.219344 0.615 0.153 0
write.csv(MonocyteSubtype.markers, file.path(resultsPath, "MonocyteSubtype.markers.csv"), quote = F,row.names = T)enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
geneList <- data.frame(Gene=row.names(MonocyteSubtype.markers),
Weight=scales::rescale(length(MonocyteSubtype.markers$p_val_adj):1))
results <- enrichr(genes = geneList, databases = enrichr_dbs ) Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
for (db in enrichr_dbs){
cat('\n')
cat("### ",db,"\n")
# res <- subset(results[[db]], Adjusted.P.value<=0.05)
createDT_html(results, paste("Enrichr Results:",db))
cat('\n')
} List of 7 $ KEGG_2018 :‘data.frame’: 20 obs. of 9 variables: ..$ Term : chr [1:20] “Arachidonic acid metabolism sce00590” “Glutathione metabolism sce00480” “Longevity regulating pathway - multiple species sce04213” “Peroxisome sce04146” … ..$ Overlap : chr [1:20] “1/6” “1/26” “1/37” “1/40” … ..$ P.value : num [1:20] 0.0605 0.2371 0.3197 0.3407 0.789 … ..$ Adjusted.P.value : int [1:20] 1 1 1 1 1 1 1 1 1 1 … ..$ Old.P.value : num [1:20] 0.0152 0.0575 0.0801 0.0862 0.2845 … ..$ Old.Adjusted.P.value: num [1:20] 0.304 0.431 0.431 0.431 0.881 … ..$ Z.score : num [1:20] -2.22 -1.8 -1.57 -1.52 -1.44 … ..$ Combined.Score : num [1:20] 6.24 2.59 1.79 1.64 0.34 … ..$ Genes : chr [1:20] “GPX1” “GPX1” “SOD1” “SOD1” … $ Reactome_2016 :‘data.frame’: 647 obs. of 9 variables: ..$ Term : chr [1:647] “EPHB-mediated forward signaling_Homo sapiens_R-HSA-3928662” “Sema4D induced cell migration and growth-cone collapse_Homo sapiens_R-HSA-416572” “Sema4D in semaphorin signaling_Homo sapiens_R-HSA-400685” “Fcgamma receptor (FCGR) dependent phagocytosis_Homo sapiens_R-HSA-2029480” … ..$ Overlap : chr [1:647] “7/42” “5/24” “5/27” “10/120” … ..$ P.value : num [1:647] 0.000917 0.001884 0.002661 0.00157 0.003087 … ..$ Adjusted.P.value : num [1:647] 0.28 0.28 0.28 0.28 0.28 … ..$ Old.P.value : num [1:647] 0.00307 0.0052 0.00702 0.00635 0.01024 … ..$ Old.Adjusted.P.value: num [1:647] 0.807 0.807 0.807 0.807 0.807 … ..$ Z.score : num [1:647] -2.12 -2.14 -2.12 -1.92 -1.95 … ..$ Combined.Score : num [1:647] 14.8 13.4 12.6 12.4 11.3 … ..$ Genes : chr [1:647] “LYN;CDC42;ARPC2;ARPC3;CFL1;ARPC5;ACTB” “CDC42;MYL6;RHOG;RHOC;MYL12B” “CDC42;MYL6;RHOG;RHOC;MYL12B” “LYN;CDC42;HCK;FCGR3A;ARPC2;ARPC3;CFL1;ARPC5;WASF2;ACTB” … $ GO_Biological_Process_2018 :‘data.frame’: 1623 obs. of 9 variables: ..$ Term : chr [1:1623] “neutrophil degranulation (GO:0043312)” “neutrophil mediated immunity (GO:0002446)” “negative regulation of viral entry into host cell (GO:0046597)” “neutrophil activation involved in immune response (GO:0002283)” … ..$ Overlap : chr [1:1623] “36/480” “36/488” “4/16” “36/484” … ..$ P.value : num [1:1623] 3.00e-08 3.98e-08 1.84e-05 3.46e-08 1.62e-04 … ..$ Adjusted.P.value : num [1:1623] 2.15e-05 2.15e-05 7.47e-03 2.15e-05 3.76e-02 … ..$ Old.P.value : num [1:1623] 3.39e-07 4.39e-07 7.26e-05 3.86e-07 4.27e-04 … ..$ Old.Adjusted.P.value: num [1:1623] 0.000238 0.000238 0.029454 0.000238 0.115476 … ..$ Z.score : num [1:1623] -2.02 -1.94 -2.44 -1.25 -1.89 … ..$ Combined.Score : num [1:1623] 35 33.1 26.6 21.5 16.5 … ..$ Genes : chr [1:1623] “CFD;FCN1;CSTB;GRN;SERPINA1;STXBP2;PLAC8;FTH1;COTL1;S100A12;CD14;CD36;CTSD;S100A11;CTSC;HSPA8;SERPINB1;FCER1G;RN”| truncated “CFD;FCN1;CSTB;GRN;SERPINA1;STXBP2;PLAC8;FTH1;COTL1;S100A12;CD14;CD36;CTSD;S100A11;CTSC;HSPA8;SERPINB1;FCER1G;RN”| truncated “IFITM3;FCN1;IFITM1;IFITM2” “CFD;FCN1;CSTB;GRN;SERPINA1;STXBP2;PLAC8;FTH1;COTL1;S100A12;CD14;CD36;CTSD;S100A11;CTSC;HSPA8;SERPINB1;FCER1G;RN”| truncated … $ GO_Molecular_Function_2018 :‘data.frame’: 264 obs. of 9 variables: ..$ Term : chr [1:264] “RAGE receptor binding (GO:0050786)” “Toll-like receptor binding (GO:0035325)” “transcriptional repressor activity, RNA polymerase II activating transcription factor binding (GO:0098811)” “RNA polymerase II distal enhancer sequence-specific DNA binding (GO:0000980)” … ..$ Overlap : chr [1:264] “4/10” “3/10” “3/13” “4/53” … ..$ P.value : num [1:264] 0.000124 0.004542 0.007714 0.017518 0.017919 … ..$ Adjusted.P.value : num [1:264] 0.0328 0.5996 0.6788 0.7885 0.7885 … ..$ Old.P.value : num [1:264] 0.00038 0.00788 0.01227 0.02582 0.02562 … ..$ Old.Adjusted.P.value: num [1:264] 0.1 0.957 0.957 0.957 0.957 … ..$ Z.score : num [1:264] -3 -2.31 -2.03 -2.32 -2.08 … ..$ Combined.Score : num [1:264] 26.94 12.44 9.87 9.36 8.35 … ..$ Genes : chr [1:264] “HMGB2;S100A12;S100A9;S100A8” “CD36;S100A9;S100A8” “JUN;FOS;KLF4” “JUN;SPI1;H2AFZ;ACTB” … $ GO_Cellular_Component_2018 :‘data.frame’: 173 obs. of 9 variables: ..$ Term : chr [1:173] “autolysosome (GO:0044754)” “secretory granule lumen (GO:0034774)” “vacuolar lumen (GO:0005775)” “ficolin-1-rich granule lumen (GO:1904813)” … ..$ Overlap : chr [1:173] “2/9” “24/318” “12/162” “11/124” … ..$ P.value : num [1:173] 3.66e-03 1.21e-06 1.52e-03 1.86e-03 4.15e-04 … ..$ Adjusted.P.value : num [1:173] 0.07912 0.00021 0.04372 0.04586 0.02392 … ..$ Old.P.value : num [1:173] 1.02e-02 7.24e-05 1.03e-02 1.05e-02 3.34e-03 … ..$ Old.Adjusted.P.value: num [1:173] 0.2269 0.0125 0.2269 0.2269 0.1927 … ..$ Z.score : num [1:173] -3.41 -1.31 -2.17 -2.17 -1.51 … ..$ Combined.Score : num [1:173] 19.2 17.8 14.1 13.7 11.8 … ..$ Genes : chr [1:173] “FTH1;FTL” “CFD;FCN1;HSPA8;CSTB;GRN;SERPINB1;SERPINA1;RNASET2;ARPC5;LYZ;PLAC8;BIN2;NPC2;COTL1;S100A12;MNDA;PTPN6;TIMP1;S100”| truncated “PLAC8;HSPA8;GRN;VCAN;NPC2;RNASET2;NAAA;MNDA;LYZ;CTSD;CTSC;FTL” “CFD;FCN1;HSPA8;CSTB;SERPINA1;BIN2;FTH1;COTL1;MNDA;ARPC5;CTSD” … $ Rare_Diseases_AutoRIF_ARCHS4_Predictions:‘data.frame’: 2819 obs. of 9 variables: ..$ Term : chr [1:2819] “Infantile_histiocytoid_cardiomyopathy” “Berylliosis” “Acute_hemorrhagic_leukoencephalitis” “Cerebral_dysgenesis_neuropathy_ichthyosis_and_palmoplantar_keratoderma_syndrome” … ..$ Overlap : chr [1:2819] “33/200” “28/200” “25/200” “30/200” … ..$ P.value : num [1:2819] 2.50e-14 2.27e-08 2.27e-08 2.27e-08 2.74e-09 … ..$ Adjusted.P.value : num [1:2819] 7.04e-11 7.11e-06 7.11e-06 7.11e-06 2.58e-06 … ..$ Old.P.value : num [1:2819] 1.97e-12 3.05e-07 3.05e-07 3.05e-07 4.84e-08 … ..$ Old.Adjusted.P.value: num [1:2819] 5.56e-09 9.55e-05 9.55e-05 9.55e-05 4.55e-05 … ..$ Z.score : num [1:2819] -1.79 -1.89 -1.88 -1.81 -1.6 … ..$ Combined.Score : num [1:2819] 55.9 33.3 33 31.9 31.6 … ..$ Genes : chr [1:2819] “IFITM3;FCN1;CSF3R;IFITM2;SPI1;STXBP2;LST1;LY96;CORO1A;AIF1;LILRA5;GLIPR2;COTL1;TSPO;S100A12;S100A11;LRRC25;VASP”| truncated “CD86;CEBPB;GRN;SPI1;STXBP2;MS4A7;LY96;CAPG;AIF1;CD14;CTSD;HLA-DPA1;VASP;LYN;FCER1G;LY86;RHOG;LILRB2;PILRA;TNFRS”| truncated “CD86;GRN;SPI1;STXBP2;MS4A7;CAPG;AIF1;FGD2;HLA-DMA;CD14;HLA-DPA1;VASP;LYN;FCER1G;RHOG;LILRB2;PILRA;TNFRSF1B;HLA-”| truncated “FCN1;GRN;IFITM2;SPI1;STXBP2;LILRA5;GLIPR2;COTL1;TSPO;CD14;BID;S100A11;LRRC25;VASP;GPX1;RHOG;AGTRAP;TALDO1;LILRB”| truncated … $ Human_Gene_Atlas :‘data.frame’: 50 obs. of 9 variables: ..$ Term : chr [1:50] “CD14+_Monocytes" “CD33+_Myeloid" “WholeBlood” “Tongue” … ..$ Overlap : chr [1:50] “38/383” “52/679” “35/514” “6/115” … ..$ P.value : num [1:50] 2.59e-06 2.59e-04 9.48e-05 1.17e-01 6.01e-01 … ..$ Adjusted.P.value : num [1:50] 0.000129 0.004312 0.002371 1 1 … ..$ Old.P.value : num [1:50] 9.05e-06 7.11e-04 2.64e-04 1.43e-01 6.55e-01 … ..$ Old.Adjusted.P.value: num [1:50] 0.000453 0.011844 0.006612 1 1 … ..$ Z.score : num [1:50] -2.04 -2.21 -1.92 -1.09 -1.63 … ..$ Combined.Score : num [1:50] 26.272 18.293 17.746 2.339 0.832 … ..$ Genes : chr [1:50] “CD86;FCN1;SPI1;AHNAK;CEBPD;STXBP2;LST1;CAPG;GLRX;IKZF1;AIF1;LILRA5;RGS2;COTL1;AP1S2;TSPO;CD14;BID;LRRC25;NUP214”| truncated “CD86;FCN1;LYST;MRPL33;RGS2;ZFP36;LGALS2;FTH1;COTL1;AP1S2;TSPO;CD36;CCNL1;AMICA1;IER2;SERPINB1;APLP2;RNASET2;FOS”| truncated “CSF3R;IFITM2;SPI1;BCL2A1;STXBP2;LST1;LY96;GLRX;LYST;AIF1;LILRA5;LIMD2;RGS2;ARHGDIB;COTL1;BID;AMICA1;S100A11;VAS”| truncated “CSTB;CSTA;ANXA1;UQCRB;TXN;S100A8” …
## Error in DT::datatable(DF, caption = caption, extensions = "Buttons", : 'data' must be 2-dimensional (e.g. data frame or matrix)